#clean up
rm(list=ls())

#set seed
set.seed(715130425)

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(spdep)
library(rgdal)
library(prevR)
library(car)
library(xtable)

#options
options(scipen=8)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')
#setwd('/Users/jamie/Documents/gillAreal/ccesCode/')
#setwd('/Users/jamie/Desktop/nhgis0003_shape/nhgis0003_shapefile_us_tract_1970/')

#load survey data
combined<-read.dta('cces08reformatted.dta')

#load ZIP code data
zip.0<-read.fwf('zcta5.txt', widths=c(2,5,59,9,9,14,14,12,12,10,10),col.names=c('state','zip','zcta','population','housingUnits','landMeters','waterMeters','landMiles','waterMiles','latitude','longitude'))
zip.0$zipLandKM<-zip.0$landMeters/(1000^2)
zip.0$zipPop<-zip.0$population
zip<-subset(zip.0, subset=state!='AK'&state!='HI'&state!='PR', select=c(state,zip,latitude,longitude,zipPop,zipLandKM))

###EMPIRICAL SEMIVARIOGRAM###
ideol.vario<-variog(coords=cbind(combined$eastings,combined$northings), data=combined$ideology)
ideol.robust<-variog(coords=cbind(combined$eastings,combined$northings), data=combined$ideology, estimator.type="modulus")

#pdf('rawVariogram.pdf')
plot(ideol.vario, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,800))
par(new=T)
plot(ideol.robust, xlab="", ylab="",axes=F,ylim=c(600,800),pch='+', col='blue')
#dev.off()

###CHOOSE A TREND TERM###
#training v. out-of-sample forecast
n<-dim(combined)[1]
ninety<-sample(1:n,round(.9*n))
training<-combined[ninety,]
forecasting<-combined[-ninety,]

#five versions of the trend
none<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat),data=training);summary(none)#adj r^2=.1308
ssr<-sum((predict(none,forecasting)-mean(predict(none,forecasting)))^2)
sst<-sum((forecasting$ideology-mean(forecasting$ideology))^2)
ssr/sst #.1395993
(19389^(33/19389))*(sst-ssr)/19389 #65.9077

linear<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings+northings,data=training);summary(linear)#adj r^2=.1365
ssr.1<-sum((predict(linear,forecasting)-mean(predict(linear,forecasting)))^2)
ssr.1/sst #.1469531
(19389^(35/19389))*(sst-ssr.1)/19389 #65.41097

quadratic<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2),data=training);summary(quadratic)#adj r^2=.1362
ssr.2<-sum((predict(quadratic,forecasting)-mean(predict(quadratic,forecasting)))^2)
ssr.2/sst #.1488206
19389^(38/19389)*(sst-ssr.2)/19389 #65.36754

cubic<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2)+I(eastings^2)*northings+eastings*I(northings^2)+I(eastings^3)+I(northings^3),data=training);summary(cubic)#adj r^2=.1385
ssr.3<-sum((predict(cubic,forecasting)-mean(predict(cubic,forecasting)))^2)
ssr.3/sst #.1501831
19389^(42/19389)*(sst-ssr.3)/19389 #65.39596

quartic<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)*I(northings^2)+I(eastings^2)*northings+eastings*I(northings^2)+I(eastings^3)+I(northings^3)+northings*I(eastings^3)+eastings*I(northings^3)+I(eastings^4)+I(northings^4),data=training);summary(quartic)#adj r^2=.1388
ssr.4<-sum((predict(quartic,forecasting)-mean(predict(quartic,forecasting)))^2)
ssr.4/sst #.1504514
19389^(47/19389)*(sst-ssr.4)/19389 #65.54197

###QUADRATIC AS THE MODEL; CUBIC AND NO TREND AS COMPARISONS###
ideol.ols<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2),data=combined);summary(ideol.ols)
se<-sqrt(diag(summary(ideol.ols)$cov.unscaled))*summary(ideol.ols)$sigma
xtable(cbind(ideol.ols$coefficients,se,confint(ideol.ols,level=.9)),digits=4)
xtable(cbind(ideol.ols$coefficients,se,confint(ideol.ols,level=.9)),digits=10)
combined$ols.resid<-ideol.ols$residuals

ideol.ols.cub<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2)+I(eastings^2)*northings+eastings*I(northings^2)+I(eastings^3)+I(northings^3),data=combined);summary(ideol.ols.cub)
se<-sqrt(diag(summary(ideol.ols.cub)$cov.unscaled))*summary(ideol.ols.cub)$sigma
xtable(cbind(ideol.ols.cub$coefficients,se,confint(ideol.ols.cub,level=.9)),digits=8)

ideol.ols.notrend<-lm(ideology~age*educ+as.factor(race)*female+as.factor(inc14)+catholic+mormon+orthodox+jewish+islam+mainline+evangelical+rural+ownership+as.factor(empstat),data=combined);summary(ideol.ols.notrend)

###RESIDUAL SEMIVARIOGRAM###
resid.robust<-variog(coords=cbind(combined$eastings,combined$northings), data=combined$ols.resid, estimator.type="modulus")

###A FEW VARIOGRAM MODELS USING WEIGHTED LEAST SQAURES###
wave.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="wave", fix.nugget=FALSE, nugget=700)
wave.fit
21543^(3/21543)* 5638303743/21543 #262087.1

exponential.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="exponential", fix.nugget=FALSE, nugget=700)
exponential.fit
21543^(3/21543)* 5830434615/21543 #271018

matern.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="matern", fix.nugget=FALSE, nugget=700, kappa=1)
matern.fit
21543^(3/21543)* 5652415210/21543 #262743.1

gaussian.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="gaussian", fix.nugget=FALSE, nugget=700)
gaussian.fit
21543^(3/21543)* 5637272224/21543 #262039.2
#      tausq     sigmasq         phi 
#   623.9575   3610.8082 133819.0708 

#graphical output of residual variogram model
pdf('olsGaussian1.pdf',width=5,height=5)
plot(ideol.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,725))
par(new=T)
plot(resid.robust, xlab="", ylab="",axes=F,ylim=c(600,725),pch='+', col='blue')
lines(gaussian.fit, col='blue')
dev.off()

pdf('olsGaussian2.pdf',width=5,height=5)
plot(resid.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(610,640),pch='+', col='blue')
lines(gaussian.fit, col='blue')
dev.off()


###LOAD THE 2000 CENSUS DATA###
census<-read.dta('census2000.dta')
obs<-table(census$statefip); obs

###LOAD COUNTY-BASED DATA###
#USDA urban-rural continuum codes by county
#http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UfCK11OE4xc
usda.0<-read.csv('ruralurbancodes2003b.csv')
#subtract 1 to have a 0 origin
usda.0$rural<-usda.0$X2003.Rural.urban.Continuum.Code-1
#create population variable for rejection sampler
usda.0$X2000.Population[is.na(usda.0$X2000.Population)]<-0
state.maxes<-as.matrix(by(usda.0$X2000.Population,usda.0$State,max))
usda.0$pop2000<-usda.0$X2000.Population/state.maxes[usda.0$State]

#subsetting
usda.1<-subset(usda.0, select=c(FIPS.Code,State,County.Name,rural,pop2000))
usda.1$County.Name<-tolower(usda.1$County.Name)
usda.1$County.Name<-sub(" county", "", usda.1$County.Name)
usda.1$County.Name<-sub(" parish", "", usda.1$County.Name)
usda.1$County.Name<-sub("\\.", "", usda.1$County.Name)
usda.1$County.Name[usda.1$County.Name=="dekalb"]<-"de kalb" 
usda.1$County.Name[usda.1$County.Name=="lamoure"]<-"la moure"
usda.1$County.Name[usda.1$County.Name=="laporte"]<-"la porte"
usda.1$County.Name[usda.1$County.Name=="dupage"]<-"du page"
usda.1$County.Name[usda.1$County.Name=="desoto"]<-"de soto"
usda.1$County.Name[usda.1$County.Name=="dewitt"]<-"de witt"
usda.1$County.Name[!usda.1$County.Name%in%c("baltimore city","st louis city","carson city","charles city","james city")]<-sub(" city","",usda.1$County.Name[!usda.1$County.Name%in%c("baltimore city","st louis city","carson city","charles city","james city")])
usda<-subset(usda.1,State!="AK" & State !="HI")

#ARDA religious census
#http://www.thearda.com/Archive/Files/Descriptions/RCMSCY.asp
#Mainline: http://www.thearda.com/rcms2010/mainline.asp
#Evangelical: http://www.thearda.com/rcms2010/evangelical.asp
arda.0<-read.dta('arda2000.DTA')
arda.0$county<-tolower(arda.0$county)
arda.1<-subset(arda.0,select=c(mainrt,evanrt,cathrt,orthrt,jewrt,islamrt,ldsrt,totrt,fip))
arda.1$other<-1000-arda.1$mainrt-arda.1$evanrt-arda.1$cathrt-arda.1$orthrt-arda.1$jewrt-arda.1$islamrt-arda.1$ldsrt
arda.1$other[arda.1$other<0]<-0
ardaUSDA<-merge(x=arda.1,y=usda,by.x='fip',by.y='FIPS.Code')
ardaUSDA[3074,]<-c(48301,0,73,0,0,0,0,0,73,0,'TX','loving',8,.00000001)

###GENERATE LOCATIONS AND DRAW FROM CENSUS DATA FOR EACH###
##set up nationwide county map to link kriged points with
us.counties<-map("county",fill=FALSE, plot=TRUE, resolution=0)
IDs.2 <- us.counties$names
us.counties.1<-map(database="county",region=IDs.2, fill=TRUE, plot=TRUE, interior=TRUE, exact=TRUE, resolution=0)
a.2<-map2SpatialPolygons(us.counties.1, IDs=IDs.2, proj4string=CRS("+proj=longlat +datum=NAD83"))
#us.map.1<-spTransform(a.1,CRS("+init=esri:102003"))
us.counties.2<-spTransform(a.2,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))

##set up map of Virginia to link kriged points with
#Source for 2008 Virginia congressional maps:
#ftp://ftp2.census.gov/geo/tiger/TIGER2008/51_VIRGINIA/
va.cd.0<-readShapePoly(fn="tl_2008_51_cd110/tl_2008_51_cd110.shp", proj4string=CRS("+proj=longlat +datum=NAD83"))#@!
va.cd.1<-spTransform(va.cd.0,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
#convert to spatial polygons
va.cd.2<-as(va.cd.1,'SpatialPolygons')

##lookup info for counties and states
state.id<-rep(NA,3082)
for(i in 1:3082){state.id[i]<-strsplit(names(us.counties.2),split=",")[[i]][1]}
state.id<-recode(state.id,'"alabama"="AL";"arizona"="AZ";"arkansas"="AR";"california"="CA";"colorado"="CO";"connecticut"="CT";"delaware"="DE";"district of columbia"="DC";"florida"="FL";"georgia"="GA";"idaho"="ID";"illinois"="IL";"indiana"="IN";"iowa"="IA";"kansas"="KS";"kentucky"="KY";"louisiana"="LA";"maine"="ME";"maryland"="MD";"massachusetts"="MA";"michigan"="MI";"minnesota"="MN";"mississippi"="MS";"missouri"="MO";"montana"="MT";"nebraska"="NE";"nevada"="NV";"new hampshire"="NH";"new jersey"="NJ";"new mexico"="NM";"new york"="NY";"north carolina"="NC";"north dakota"="ND";"ohio"="OH";"oklahoma"="OK";"oregon"="OR";"pennsylvania"="PA";"rhode island"="RI";"south carolina"="SC";"south dakota"="SD";"tennessee"="TN";"texas"="TX";"utah"="UT";"vermont"="VT";"virginia"="VA";"washington"="WA";"west virginia"="WV";"wisconsin"="WI";"wyoming"="WY"')
county.id.0<-county.id<-rep(NA,3082)
for(i in 1:3082){county.id[i]<-strsplit(names(us.counties.2),split=",")[[i]][2]}
county.id[county.id=="dade" & state.id=="FL"]<-"miami-dade"
county.id<-sub(":main","",county.id)
county.id<-sub(":penrose","",county.id)
county.id<-sub(":lopez island","",county.id)
county.id<-sub(":orcas island","",county.id)
county.id<-sub(":san juan island","",county.id)
county.id<-sub(":chincoteague","",county.id)
county.id<-sub(":spit","",county.id)
county.id<-sub(":knotts","",county.id)
county.id<-sub(":north","",county.id)
county.id<-sub(":south","",county.id)
county.id[county.id=="obrien" & state.id=="IA"]<-"o'brien"
county.id[county.id=="prince georges" & state.id=="MD"]<-"prince george's"
county.id[county.id=="queen annes" & state.id=="MD"]<-"queen anne's"
county.id[county.id=="st marys" & state.id=="MD"]<-"st mary's"
county.id[county.id=="washington" & state.id=="DC"]<-"district of columbia"

##output from randomly drawing points
religion.K<-matrix(NA,nrow=1,ncol=8)
colnames(religion.K)<-c('other','cath','mormon','ortho','jewish','islam','main','evan')

##set-up covariate simulation paramters for Virginia
fem.dist<-c(.507,.5,.52,.507,.508,.518,.515,.508,.503,.497,.506)
race.dist<-matrix(data=c(527537,150433,81621,
						416345,142105,82913,
						242970,375009,52744,
						436237,237257,49052,
						478757,151448,35290,
						567102,75400,40657,
						551792,123252,64358,
						389716,91657,194665,
						597423,15980,33440,
						549273,66069,190165,
						432337,91822,234263),nrow=11, ncol=3, byrow=TRUE)
age.bin.dist<-matrix(data=c(.102,.13,.147,.154,.105,.065,.055,
						.119,.137,.146,.146,.098,.057,.053,
						.128,.133,.142,.139,.096,.059,.053,
						.103,.131,.143,.158,.11,.063,.05,
						.115,.118,.137,.145,.124,.087,.07,
						.119,.123,.131,.145,.117,.08,.078,
						.083,.137,.147,.153,.118,.067,.058,
						.07,.153,.187,.159,.115,.06,.047,
						.126,.121,.135,.141,.125,.086,.075,
						.084,.138,.163,.156,.105,.053,.033,
						.085,.118,.158,.153,.131,.057,.032),nrow=11, ncol=7, byrow=TRUE)
educ.dist<-matrix(data=c(.111,.28,.306,.188,.115,
						.093,.271,.351,.182,.103,
						.178,.297,.305,.134,.085,
						.166,.298,.308,.153,.075,
						.209,.307,.262,.129,.093,
						.164,.326,.264,.157,.089,
						.114,.251,.274,.235,.126,
						.104,.125,.182,.296,.293,
						.239,.312,.283,.103,.063,
						.101,.181,.227,.289,.202,
						.086,.151,.236,.287,.240),nrow=11, ncol=5, byrow=TRUE)
own.dist<-c(.726,.649,.521,.731,.703,.686,.738,.569,.711,.736,.776)
emp.dist<-matrix(data=c(.67,.03,.30,
						.695,.032,.273,
						.617,.055,.328,
						.626,.039,.335,
						.58,.036,.384,
						.596,.027,.378,
						.67,.03,.3,
						.735,.026,.239,
						.522,.032,.445,
						.724,.029,.248,
						.722,.026,.252),nrow=11, ncol=3, byrow=TRUE)
income<-c(80132,70651,50811,67929,53097,59042,80514,111455,45863,108760,120135)
inc.rate<-log(2)/income

#Virginia kriges first
zip.va<-subset(zip,state=="VA")
N<-50000

#draw zip codes in proportion to population
zip.va$k.va.zip<-rmultinom(1,size=N,prob=zip.va$zipPop)
x.va<-rep(zip.va$longitude,zip.va$k.va.zip)
y.va<-rep(zip.va$latitude,zip.va$k.va.zip)
va.coords<-as.data.frame(cbind(x.va,y.va))

#reproject coordinates
coordinates(va.coords)<-c('x.va','y.va')
proj4string(va.coords)<-CRS("+proj=longlat +datum=NAD83")
va.coords.2<-spTransform(va.coords,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
colnames(va.coords.2@coords)<-c('eastings','northings')
va.coords.3<-as.data.frame(va.coords.2@coords)

radii<-sqrt(rep(zip.va$zipLandKM,zip.va$k.va.zip)/pi)#radii to jitter by
radii[radii==0]<-.1
for(i in 1:length(va.coords.3$eastings)){
	va.coords.3$eastings[i]<-jitter(va.coords.3$eastings[i],amount=radii[i])
	va.coords.3$northings[i]<-jitter(va.coords.3$northings[i],amount=radii[i])
	}
dim(va.coords.3)
plot(x=va.coords.3$eastings,y=va.coords.3$northings,pch='.')

	#find the right polygon
	joint<-SpatialPoints(va.coords.3,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
	pol.no<-joint%over%va.cd.2
	cd<-recode(pol.no,'1=6;2=9;3=10;4=1;5=11;6=5;7=7;8=8;9=4;10=3;11=2')
	sum(table(cd))
	county.no<-joint%over%us.counties.2
	linkup<-rep(NA,N)
	for(i in 1:N){
		linkup[i]<-ifelse(!is.na(state.id[county.no[i]]), which(ardaUSDA$County.Name==county.id[county.no[i]] & ardaUSDA$State==state.id[county.no[i]]), NA)
		}
	spat.info.0<-cbind(va.coords.3$eastings,va.coords.3$northings,cd,county.id[county.no],state.id[county.no],ardaUSDA$rural[linkup],51)
	spat.info.1<-spat.info.0[!is.na(spat.info.0[,3]) & !is.na(spat.info.0[,4]),]
	spat.info<-spat.info.2<-spat.info.1[order(spat.info.1[,3]),]
	linkup.2<-linkup[!is.na(spat.info.0[,3]) & !is.na(spat.info.0[,4])]
	linkup.2<-linkup.2[order(spat.info.1[,3])]
	
	#record district codes
	no.dist<-table(spat.info.2[,3])
	no.dist[paste(c(1:11))]

#plug in covariate values
covariates<-matrix(NA,nrow=dim(spat.info)[1],ncol=8)
colnames(covariates)<-colnames(census)
acs<-rep(1,dim(spat.info)[1])

for(i in 1:dim(spat.info)[1]){
	covariates[i,1]<-51 #VA FIPS code
	
	#draw religion
	r.1<-rmultinom(1,1,prob=c(ardaUSDA$other[linkup.2[i]],ardaUSDA$cathrt[linkup.2[i]],ardaUSDA$ldsrt[linkup.2[i]],ardaUSDA$orthrt[linkup.2[i]],ardaUSDA$jewrt[linkup.2[i]],ardaUSDA$islamrt[linkup.2[i]],ardaUSDA$mainrt[linkup.2[i]],ardaUSDA$evanrt[linkup.2[i]]))
	religion.K<-rbind(religion.K,t(r.1))

	#draw from 2008 ACS
	p.2<-as.numeric(spat.info[i,3])
	fem.prob<-fem.dist[p.2]
	race.prob<-as.numeric(race.dist[p.2,])
	age.prob<-as.numeric(age.bin.dist[p.2,])
	educ.prob<-as.numeric(educ.dist[p.2,])
	own.prob<-own.dist[p.2]
	emp.prob<-as.numeric(emp.dist[p.2,])
	inc.prob<-inc.rate[p.2]

	#draw female
	covariates[i,8]<-rbinom(1,1,fem.prob)

	#draw race: white, black, other
	covariates[i,4]<-which.max(rmultinom(1,1,prob=race.prob))

	#draw age
	#assume the maximum is 98, which is what we see in the CCES
	#choose a bin: 18-24, 25-34, 35-44, 45-54, 55-64, 65-74, 75+
	age.bin<-which.max(rmultinom(1,1,prob=age.prob))
		if(age.bin==1){	age<-which.max(rmultinom(1,1,prob=c(138,122,130,153,125,142,170)))+17
			}else if(age.bin==2){age<-which.max(rmultinom(1,1,prob=c(198,221,240,263,225,287,288,279,325,313)))+24
				}else if(age.bin==3){age<-which.max(rmultinom(1,1,prob=c(282,269,321,371,317,378,404,402,447,481)))+34
					}else if(age.bin==4){age<-which.max(rmultinom(1,1,prob=c(525,532,581,552,580,524,626,624,641,611)))+44
						}else if(age.bin==5){age<-which.max(rmultinom(1,1,prob=c(503,472,459,462,442,485,512,449,375,340)))+54
							}else if(age.bin==6){age<-which.max(rmultinom(1,1,prob=c(550,557,476,440,338,333,275,266,218,194)))+64
								}else{age<-which.max(rmultinom(1,1,prob=c(169,144,141,112,111,67,59,44,28,25,15,17,12,5,5,4,2,2,1,1,1,1,1,1)))+74}
	covariates[i,3]<-age

	#draw education: CCES: 0=no hs; 1=hs; 2= some college; 3=2 year; 4=4 year; 5=post grad
	#ACS pools "some college" and "2 year." We assume the same split as in the CCES. 23.91% of "some college" have a 2-year degree.
	covariates[i,5]<-which.max(rmultinom(1,1,prob=c(educ.prob[1],educ.prob[2],(.7608789*educ.prob[3]),(.2391211*educ.prob[3]),educ.prob[4],educ.prob[5])))-1

	#draw ownership
	covariates[i,2]<-rbinom(1,1,own.prob)

	#draw employment status: employed, unemployed, not in labor force
	covariates[i,6]<-which.max(rmultinom(1,1,prob=emp.prob))

	#draw income from an exponential distribution with the district median
	inc.draw<-round(rexp(1,rate=inc.prob))
	covariates[i,7]<-recode(inc.draw,'lo:9999=1;10000:14999=2;15000:19999=3;20000:24999=4;25000:29999=5;30000:39999=6;40000:49999=7;50000:59999=8;60000:69999=9;70000:79999=10;80000:99999=11;100000:119999=12;120000:149999=13;150000:hi=14')
	}
religion.K<-religion.K[-1,]

###now the states###
N<-200000
	
#draw zip codes in proportion to population
zip$k.st.zip<-rmultinom(1,size=N,prob=zip$zipPop)
x.st<-rep(zip$longitude,zip$k.st.zip)
y.st<-rep(zip$latitude,zip$k.st.zip)
st.coords<-as.data.frame(cbind(x.st,y.st))

#reproject coordinates
coordinates(st.coords)<-c('x.st','y.st')
proj4string(st.coords)<-CRS("+proj=longlat +datum=NAD83")
st.coords.2<-spTransform(st.coords,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
colnames(st.coords.2@coords)<-c('eastings','northings')
st.coords.3<-as.data.frame(st.coords.2@coords)

radii.st<-sqrt(rep(zip$zipLandKM,zip$k.st.zip)/pi)#radii to jitter by
radii.st[radii.st==0]<-.1
for(i in 1:length(st.coords.3$eastings)){
	st.coords.3$eastings[i]<-jitter(st.coords.3$eastings[i],amount=radii.st[i])
	st.coords.3$northings[i]<-jitter(st.coords.3$northings[i],amount=radii.st[i])
	}
dim(st.coords.3)
plot(x=st.coords.3$eastings,y=st.coords.3$northings,pch='.')

	#find the right polygon
	joint.2<-SpatialPoints(st.coords.3,CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs"))
	county.no.2<-joint.2%over%us.counties.2
	linkup.B<-rep(NA,N)
	for(i in 1:N){
		linkup.B[i]<-ifelse(!is.na(state.id[county.no.2[i]]), which(ardaUSDA$County.Name==county.id[county.no.2[i]] & ardaUSDA$State==state.id[county.no.2[i]]), NA)
		}
	spat.info.B.0<-cbind(st.coords.3$eastings,st.coords.3$northings,NA,county.id[county.no.2],state.id[county.no.2],ardaUSDA$rural[linkup.B],trunc(as.numeric(ardaUSDA$fip[linkup.B])/1000))
	spat.info.B.1<-spat.info.B.0[!is.na(spat.info.B.0[,4]),]
	spat.info.B.2<-spat.info.B.1[order(as.numeric(spat.info.B.1[,7])),]
	linkup.B.2<-linkup.B[!is.na(spat.info.B.0[,4])]
	linkup.B.2<-linkup.B.2[order(as.numeric(spat.info.B.1[,7]))]
	spat.info.B<-spat.info.B.2[!is.na(spat.info.B.2[,7]),]
	length(linkup.B.2); dim(spat.info.B)
	
	#state summary
	no.state<-table(spat.info.B[,7])
	no.state[paste(c(1,4:6,8:13,16:42,44:51,53:56))]

###draw covariates for each kriged point###
covariates.B<-matrix(NA,nrow=1,ncol=8)
colnames(covariates.B)<-colnames(census)

for(i in c(1,4:6,8:13,16:42,44:51,53:56)){
	draw<-sample(1:obs[paste(i)],no.state[paste(i)],replace=TRUE)
	covariates.B<-rbind(covariates.B,census[census$statefip==paste(i),][draw,])
	}
covariates.B<-covariates.B[-1,]

#check that number of structural observations and number of spatial points is the same
spat.info.B<-spat.info.B[!is.na(spat.info.B[,4]),]
dim(covariates.B); dim(spat.info.B)

##output from randomly drawing points
religion.K.B<-matrix(NA,nrow=dim(spat.info.B)[1],ncol=8)
colnames(religion.K.B)<-c('other','cath','mormon','ortho','jewish','islam','main','evan')


for(i in 1:dim(spat.info.B)[1]){
	a<-c(as.numeric(ardaUSDA$other[linkup.B.2[i]]),as.numeric(ardaUSDA$cathrt[linkup.B.2[i]]),as.numeric(ardaUSDA$ldsrt[linkup.B.2[i]]),as.numeric(ardaUSDA$orthrt[linkup.B.2[i]]),as.numeric(ardaUSDA$jewrt[linkup.B.2[i]]),as.numeric(ardaUSDA$islamrt[linkup.B.2[i]]),as.numeric(ardaUSDA$mainrt[linkup.B.2[i]]),as.numeric(ardaUSDA$evanrt[linkup.B.2[i]]))
	if(is.na(sum(a))){cat('Error',linkup.B.2[i],'county fip', ardaUSDA$fip[linkup.B.2[i]], '\n'); religion.K.B[i,]<-c(1,0,0,0,0,0,0,0)}
		else{
	religion.K.B[i,]<-rmultinom(1,1,prob=c(ardaUSDA$other[linkup.B.2[i]],ardaUSDA$cathrt[linkup.B.2[i]],ardaUSDA$ldsrt[linkup.B.2[i]],ardaUSDA$orthrt[linkup.B.2[i]],ardaUSDA$jewrt[linkup.B.2[i]],ardaUSDA$islamrt[linkup.B.2[i]],ardaUSDA$mainrt[linkup.B.2[i]],ardaUSDA$evanrt[linkup.B.2[i]]))
		}
		}

##cleanup of the sampled covariates and kriged points
acs<-append(acs,rep(0,dim(spat.info.B)[1]))
religion.all<-rbind(religion.K,religion.K.B)
covariates.all<-rbind(covariates,covariates.B)
spat.info.all<-rbind(spat.info,spat.info.B)
colnames(spat.info.all)[4:7]<-c('county','state','rural','fips')
covariates<-cbind(covariates.all,spat.info.all,religion.all,acs)
dim(covariates)
colnames(covariates)[9:10]<-c('east.K','north.K')

write.csv(covariates,"krigedPointsSTATE.csv",row.names=F)
